* do the max likelihood estimation using Stata's module (from equation (11))
* here, theta1 is the theta in equation (11) and theta2 is the N
* I took the ln of equation (11) to get the log likelihood function
* I'm generating a constant variable with value 1 because it's the only way I can figure out to get the first equation to give me just a coefficient while also including dependent variables
program agg_agg_ml_prog

	/* for now, the program only works for two or four driver types (unlikely that many other combinations will be easy */
	local test_driver_types = "$ML_y14"
	if "`test_driver_types'" != "" local driver_types 4
	else local driver_types 2
	
	forvalues i = 1/`driver_types' {
		tempvar a_`i' 
		forvalues j = 1/`driver_types' {
			if `j'  >= `i' tempvar a_`i'_`j' 
		}
	}
	
	/* arguments over which maximization will occur */
	/* for example, N41 is the ratio of the number of drivers of type 4 relative to the number of drivers of type 1 */
	/* theta is for two-car accidents and lambda is for one-car accidents */
	if `driver_types' == 2 {
		*args lnf N21 theta_2_rel_1 lambda_2_rel_1
		args lnf theta_2_rel_1 lambda_2_rel_1
		tempvar N11 N21 theta_1_rel_1
		gen double `a_1' = $ML_y1
		gen double `a_2' = $ML_y2
		gen double `a_1_1' = $ML_y3
		gen double `a_1_2' = $ML_y4
		gen double `a_2_2' = $ML_y5
	}
	else if `driver_types' == 4 {
		*args lnf N41 N31 N21 theta_4_rel_1 theta_3_rel_1 theta_2_rel_1 lambda_4_rel_1 lambda_3_rel_1 lambda_2_rel_1
		args lnf theta_4_rel_1 theta_3_rel_1 theta_2_rel_1 lambda_4_rel_1 lambda_3_rel_1 lambda_2_rel_1
		tempvar N11 N21 N31 N41 theta_1_rel_1
		gen double `a_1' = $ML_y1
		gen double `a_2' = $ML_y2
		gen double `a_3' = $ML_y3
		gen double `a_4' = $ML_y4
		gen double `a_1_1' = $ML_y5
		gen double `a_1_2' = $ML_y6
		gen double `a_1_3' = $ML_y7
		gen double `a_1_4' = $ML_y8
		gen double `a_2_2' = $ML_y9
		gen double `a_2_3' = $ML_y10
		gen double `a_2_4' = $ML_y11
		gen double `a_3_3' = $ML_y12
		gen double `a_3_4' = $ML_y13
		gen double `a_4_4' = $ML_y14
	}

	quietly {
		/* assign values to the reference groups */
		gen double `N11' = 1
		gen double `theta_1_rel_1' = 1
		
		/* incorporate information from single car crashes using the fact that larger observed quantities of one type suggest more drivers on the road of that type*/
		forvalues k = 2/`driver_types' {
			gen double `N`k'1' = (1/`lambda_`k'_rel_1')*(`a_`k''/`a_1')
		}

		/* build the probability values for accidents of each type */
		/* first build the probability denominator */

		tempvar p_denom
		gen double `p_denom' = 0

		forvalues k = 1/`driver_types' {
			forvalues l = 1/`driver_types' {
				replace `p_denom' = `p_denom' + `N`k'1'*`N`l'1'*(`theta_`k'_rel_1'+`theta_`l'_rel_1')
			}
		}
		
		/* next build the set of probabilities */
		forvalues i = 1/`driver_types' {
			forvalues j = 1/`driver_types' {
				if `j'  >= `i' /*in order to eliminate duplicates in terms of combinations*/ {
					tempvar p_`i'_`j' 
					gen double `p_`i'_`j'' = `N`i'1'*`N`j'1'*(`theta_`i'_rel_1'+`theta_`j'_rel_1')/`p_denom'
					if `i' != `j' replace `p_`i'_`j'' = 2*`p_`i'_`j'' /*after eliminating the duplicates, need to add in the probability of observing the two types reversed*/
				}
			}
		}

		/* finally construct the likelihood function */
		tempvar acc_sum
		gen double `acc_sum' = 0 
		forvalues i = 1/`driver_types' {
			forvalues j = 1/`driver_types' {
				if `j'  >= `i' /*in order to eliminate duplicates*/ replace `acc_sum' = `acc_sum' + `a_`i'_`j''
			}
		}
		replace `lnf' = lnfactorial(`acc_sum')
		forvalues i = 1/`driver_types' {
			forvalues j = 1/`driver_types' {
				if `j'  >= `i' /*in order to eliminate duplicates*/ replace `lnf' = `lnf' - lnfactorial(`a_`i'_`j'')
			}
		}
		forvalues i = 1/`driver_types' {
			forvalues j = 1/`driver_types' {
				if `j'  >= `i' /*in order to eliminate duplicates*/ replace `lnf' = `lnf' + `a_`i'_`j''*ln(`p_`i'_`j'')
			}
		}

		*replace `lnf' = `lnf' + ln((`a_2'/`a_1') - `N21'*`lambda_2_rel_1')
	}
end
